package com.ghca.myinfo.spider.processor;

import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.handler.PatternProcessor;
import us.codecraft.webmagic.handler.RequestMatcher;
import us.codecraft.webmagic.handler.SubPageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * Created by Administrator on 2017/1/22.
 */
@Component
public class SCZBBXListProcessor implements SubPageProcessor{

    private static final String URL = "https://www.sczbbx.com/GCJS/bxgg.aspx";

    public SCZBBXListProcessor(){
    }


    @Override
    public boolean match(Request page) {
       return URL.equals(page.getUrl());
    }

    @Override
    public MatchOther processPage(Page page) {
        List<String> list = page.getHtml().$("div.text_cont").links().regex(".*/Project/ShowProject.aspx\\?pid=.*").all();
        page.addTargetRequests(list);
        return MatchOther.NO;
    }
}
